# File:     descriptive_overtime.R
# Purpose:  This script creates the overtime proportion of interruptions (Figure 5 as of 12/15/2021)
# Input:    /Data/finalData.RData
# Output:   /Paper/Figures/overtime_desc.pdf
# Author:   JB


rm(list = ls())
require(lme4)
# require(lfe)
require(tidyverse)
require(ggridges)
# Loading data
load('./data/finalData.RData')
# load('./data/finalData_for_NLP.RData')

colnames(utterance_level)
utterance_level %>%
  count(fed,GOP,DEM,Male)
toplot <- utterance_level %>%
  filter(chamber == 'House') %>%
  mutate(fed = ifelse(fed == 0 & GOP == 1 & Male == 1,'GOP - Male',
                      ifelse(fed == 0 & GOP == 1 & Male == 0,'GOP - Female',
                             ifelse(fed == 0 & DEM == 1 & Male == 1,'DEM - Male',
                                    ifelse(fed == 0 & DEM == 1 & Male == 0,'DEM - Female',
                                           ifelse(fed == 0 & GOP == 0 & DEM == 0,'Expert','Fed Chair')))))) %>%
  filter(fed != 'Expert') %>%
  # mutate(fed = ifelse(grepl('GOP',fed),'GOP',
  #                     ifelse(grepl('DEM',fed),'DEM',fed))) %>%
  mutate(fed = ifelse(grepl('Male',fed),'Male',
                      ifelse(grepl('Female',fed),'Female',fed))) %>%
  group_by(fed,date,yellenTime) %>%
  summarise(interrupted = mean(interrupted),
            nchars = mean(nchars,na.rm=T),
            n = n(),
            interruptor = mean(interruptor)) %>%
  gather(type,proportion,-fed,-date,-yellenTime,-nchars,-n) %>%
  bind_rows(utterance_level %>%
              filter(chamber == 'House') %>%
              mutate(fed = ifelse(fed == 0 & GOP == 1 & Male == 1,'GOP - Male',
                                  ifelse(fed == 0 & GOP == 1 & Male == 0,'GOP - Female',
                                         ifelse(fed == 0 & DEM == 1 & Male == 1,'DEM - Male',
                                                ifelse(fed == 0 & DEM == 1 & Male == 0,'DEM - Female',
                                                       ifelse(fed == 0 & GOP == 0 & DEM == 0,'Expert','Fed Chair')))))) %>%
              filter(fed != 'Expert') %>%
              mutate(fed = ifelse(grepl('GOP',fed),'GOP',
                                  ifelse(grepl('DEM',fed),'DEM',fed))) %>%
              # mutate(fed = ifelse(grepl('Male',fed),'Male',
              #                     ifelse(grepl('Female',fed),'Female',fed))) %>%
              group_by(fed,date,yellenTime) %>%
              summarise(interrupted = mean(interrupted),
                        nchars = mean(nchars,na.rm=T),
                        n = n(),
                        interruptor = mean(interruptor)) %>%
              gather(type,proportion,-fed,-date,-yellenTime,-nchars,-n))
  
toplot %>%
  # mutate(grp = paste0(yellenTime,fed)) %>%
  mutate(grp = paste0(ifelse(date < as.Date('2014-01-01'),'preYellen',
                             ifelse(date < as.Date('2018-01-01'),'Yellen','postYellen')),fed)) %>%
  ggplot(aes(x = date,y = proportion,weight = n,size = n,
             # shape = factor(fed),
             # color = factor(fed),
             group = factor(grp))) + 
  geom_point() + 
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  annotate(geom = 'rect',xmin = as.Date('2014-01-01'),xmax = as.Date('2018-01-01'),
           ymin = -Inf,ymax = Inf,
           alpha = .1,fill = 'grey50') + 
  # geom_errorbar(aes(ymin = proportion - sdinterrupted,
  #                   ymax = proportion + sdinterrupted)) + 
  # geom_errorbar(aes(ymin = proportion - sdinterruptor,
  #                   ymax = proportion + sdinterruptor)) + 
  geom_smooth(se = F,method = 'lm',formula = 'y ~ poly(x,1)') +
  facet_grid(fed~type) + 
  theme_bw()


toplot <- utterance_level %>%
  arrange(docID,ind) %>%
  mutate(fed = ifelse(fed == 0,'Legislators','Fed Chair')) %>%
  mutate(interruptor2 = ifelse(interruptor == 1 & lag(interrupted,2) == 0,1,0)) %>%
  group_by(fed,date,yellenTime,chamber) %>%
  summarise(interrupted = mean(interrupted),
            nchars = mean(nchars,na.rm=T),
            n = n(),
            interruptor = mean(interruptor2)) %>%
  gather(type,proportion,-fed,-date,-yellenTime,-nchars,-n,-chamber) %>%
  mutate(grp = paste0(ifelse(date < as.Date('2014-01-01'),'preYellen',
                             ifelse(date < as.Date('2018-01-01'),'Yellen','postYellen')),fed)) %>%
  mutate(type = ifelse(type == 'interrupted','Speaker is being interrupted',
                       'Speaker is interrupting someone else')) 

pdf('./output/figures/overtime_desc.pdf',width = 7,height = 5)
toplot %>%
  ggplot(aes(x = date,y = proportion,weight = n,size = n,
             label = grp,
             shape = factor(fed),
             color = factor(fed),
             linetype = factor(fed),
             group = factor(grp))) + 
  geom_point(alpha = .5) + 
  scale_y_continuous(labels = scales::percent,limits = c(0,.45)) + 
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  annotate(geom = 'rect',xmin = as.Date('2014-01-01'),xmax = as.Date('2018-01-01'),
           ymin = -Inf,ymax = Inf,
           alpha = .1,fill = 'grey50') + 
  geom_smooth(show.legend = F,se = F,method = 'lm',formula = 'y ~ poly(x,1)') +
  facet_grid(chamber~type) + 
  scale_color_manual(values = c('grey10','grey50')) + 
  scale_shape_manual(values = c(19,15)) + 
  scale_linetype_manual(values = c('solid','dashed'))  + 
  geom_text(data = toplot %>%
              filter(chamber == 'House',
                     grp == 'YellenFed Chair',
                     date == as.Date('2016-02-10')) %>%
              mutate(grp = "Yellen",
                     date = as.Date('2016-01-01'),
                     proportion = Inf),vjust = 1,show.legend = FALSE) + 
  annotate(geom = 'rect',xmin = as.Date('2014-01-01'),xmax = as.Date('2018-01-01'),
           ymin = -Inf,ymax = Inf,alpha = .2) + 
  theme_bw() + 
  theme(legend.position = 'bottom') + 
    labs(x = 'Date',y = 'Proportion of utterances',
         color = 'Speaker',shape = 'Speaker',size = '# of utterances',
         linetype = 'Speaker') + 
  guides(shape = guide_legend(override.aes = list(size = 5)))
dev.off()

utterance_level %>%
  arrange(docID,ind) %>%
  filter(grepl('fed2015-02',docID),
         chamber == 'House') %>%
  select(docID,speaker,ind,textclean,interrupted,interruptor) %>%
  # filter(interrupted == 1)
  filter(ind %in% 20:28) %>%
  pull(textclean)

# Version of the scatterplot?
utterance_level %>%
  arrange(docID,ind) %>%
  mutate(interruptor2 = ifelse(interruptor == 1 & lag(interrupted,2) == 0,1,0)) %>%
  group_by(opensecretsID) %>%
  summarise(n = n(),
            propInterrupted = mean(interrupted),
            propInterruptor = mean(interruptor),
            propInterruptor2 = mean(interruptor2)) %>%
  ggplot(aes(x = propInterrupted,y = propInterruptor,size = n)) + 
  geom_point() + 
  geom_abline(slope = 1,intercept = 0) + 
  geom_abline(slope = .35,intercept = 0)

xs <- c(0,Inf,Inf,0)

# By party & gender
utterance_level %>%
  select(speaker)
toplot2 <- utterance_level %>%
  arrange(docID,ind) %>%
  mutate(interruptor2 = ifelse(interruptor == 1 & lag(interrupted,2) == 0,1,0)) %>%
  mutate(fed = ifelse(fed == 0 & GOP == 1 & Male == 1,'GOP - Male',
                      ifelse(fed == 0 & GOP == 1 & Male == 0,'GOP - Female',
                             ifelse(fed == 0 & DEM == 1 & Male == 1,'DEM - Male',
                                    ifelse(fed == 0 & DEM == 1 & Male == 0,'DEM - Female',
                                           ifelse(fed == 0 & GOP == 0 & DEM == 0,'Expert','Fed Chair')))))) %>%
  filter(fed != 'Expert') %>%
  mutate(fed = ifelse(grepl('Male',fed),'Male',
                      ifelse(grepl('Female',fed),'Female',fed))) %>%
  # mutate(fed = ifelse(grepl('FED',opensecretsID),'Fed Chair',
  #                     ifelse(grepl('Chair',speaker),'Chairperson','Legislator'))) %>%
  # mutate(date = lubridate::round_date(date,unit = 'year')) %>%
  group_by(opensecretsID,fed) %>%
  summarise(propInterrupted = mean(interrupted),
            propInterruptor = mean(interruptor),
            propInterruptor2 = mean(interruptor2),
            n = n()) %>%
  ungroup() %>%
  mutate(mult = propInterrupted / propInterruptor,
         mult2 = propInterrupted / propInterruptor2) %>%
  filter(n > 20)  %>%
  select(-matches('mult')) %>%
  pivot_longer(cols = c('propInterruptor','propInterruptor2')) %>%
  mutate(mult = propInterrupted/value)

lm(propInterruptor2 ~ 0 + propInterrupted,toplot2 %>%
     filter(grepl('FED',opensecretsID)) %>%
     filter(grepl('YELLEN',opensecretsID)))

require(ggrepel)

pdf('./output/figures/interruptScatter.pdf',width = 7,height = 5)
toplot2 %>%
  filter(name == 'propInterruptor2') %>%
  ggplot(aes(x = propInterrupted,y = value,size = n,color = fed == 'Fed Chair')) + 
  geom_point() + 
  # scale_shape_manual(values = c(21,19),name = 'Measure',labels = c('All',
  #                                                                  'Dropping "plow throughs"')) + 
  scale_color_manual(name = 'Speaker',values = c('grey60','black')) + 
  # geom_point(shape = 21,aes(y = propInterruptor2)) + 
  # annotate(geom = 'polygon',x = c(0,.6,.6,0,0),y = c(0,.6,.6,.6,0),fill = 'red',alpha = .1) + 
  # annotate(geom = 'polygon',x = c(0,.6,.6,0,0),y = c(0,.6,0,0,0),fill = 'darkgreen',alpha = .1) + 
  geom_abline(slope = 1,intercept = 0) + 
  geom_text_repel(data = toplot2 %>% filter(grepl('FED',opensecretsID),
                                            name == 'propInterruptor2'),aes(label = paste0(gsub('FED','',opensecretsID),' (x',
                                                                                     round(mult,1),')')),
            size = 3,color = 'black',min.segment.length = 0,point.padding = .5,box.padding = .5) + 
  # geom_text_repel(data = toplot2 %>% filter(grepl('FED',opensecretsID)),
  #                 aes(y = propInterruptor2,label = paste0(gsub('FED','',opensecretsID),' (',round(mult2,1),')')),
  #                 size = 3,color = 'black',min.segment.length = 0,point.padding = .5,box.padding = .5) + 
  # geom_abline(slope = .28,intercept = 0) + 
  # geom_abline(slope = .17,intercept = 0) + 
  scale_x_continuous(labels = scales::percent,limits = c(0,.3)) + 
  scale_y_continuous(labels = scales::percent,limits = c(0,.3)) + 
  labs(title = 'Interrupted vs. Interrupting',
       size = '# Utterances',
       x = '% Interrupted',
       y = '% Interrupting') + 
  theme_bw()
dev.off()


toplot <- utterance_level %>%
  mutate(fed = ifelse(fed == 0 & GOP == 1 & Male == 1,'GOP - Male',
                      ifelse(fed == 0 & GOP == 1 & Male == 0,'GOP - Female',
                             ifelse(fed == 0 & DEM == 1 & Male == 1,'DEM - Male',
                                    ifelse(fed == 0 & DEM == 1 & Male == 0,'DEM - Female',
                                           ifelse(fed == 0 & GOP == 0 & DEM == 0,'Expert','Fed Chair')))))) %>%
  filter(fed != 'Expert') %>%
  mutate(fed = ifelse(grepl('GOP - ',fed),'GOP',
                      ifelse(grepl('DEM - ',fed),'DEM',fed))) %>%
  mutate(date = lubridate::round_date(date,unit = 'year')) %>%
  group_by(date,fed,chamber) %>%
  summarise(interruptor = mean(interruptor),
            interrupted = mean(interrupted),
            n = n()) %>%
  ungroup() %>%
  gather(type,proportion,-date,-fed,-n,-chamber) %>%
  bind_rows(utterance_level %>%
              mutate(fed = ifelse(fed == 0 & GOP == 1 & Male == 1,'GOP - Male',
                                  ifelse(fed == 0 & GOP == 1 & Male == 0,'GOP - Female',
                                         ifelse(fed == 0 & DEM == 1 & Male == 1,'DEM - Male',
                                                ifelse(fed == 0 & DEM == 1 & Male == 0,'DEM - Female',
                                                       ifelse(fed == 0 & GOP == 0 & DEM == 0,'Expert','Fed Chair')))))) %>%
              filter(fed != 'Expert') %>%
              mutate(fed = ifelse(grepl('Male',fed),'Male',
                                  ifelse(grepl('Female',fed),'Female',fed))) %>%
              mutate(date = lubridate::round_date(date,unit = 'year')) %>%
              group_by(date,fed,chamber) %>%
              summarise(interruptor = mean(interruptor),
                        interrupted = mean(interrupted),
                        n = n()) %>%
              ungroup() %>%
              gather(type,proportion,-date,-fed,-n,-chamber))
  
toplot %>%
  mutate(wdth = scales::rescale(proportion,to = c(.15,.95))) %>%
  mutate(date = format(date,'%Y')) %>%
  mutate(type = ifelse(type == 'interrupted','Speaker is being interrupted',
                       'Speaker is interrupting someone else')) %>%
  ggplot(aes(x = factor(date),y = factor(fed),fill = proportion,
             width = 1,height= wdth)) + 
  geom_tile() + facet_grid(chamber~type) + 
  scale_fill_gradient(low = 'grey95',high = 'darkred') + 
  geom_rect(xmin = 13.5,xmax = 17.5,ymin = -Inf,ymax = Inf,
            fill = NA,color = 'black',inherit.aes = F,size = 3) + 
  labs(x = 'Year',y = 'Speaker',fill = '% of utterances') + 
  theme_bw() + 
  theme(legend.position = 'bottom')

# Over time
toplot <- finalMerge %>%
  mutate(interrupted = ifelse(grepl('--$',textclean),1,0)) %>%
  mutate(interruptor = ifelse(grepl('--$',lag(textclean)),1,0)) %>%
  mutate(date = as.Date(gsub('fed|\\.txt','',docID)),
         anyDaughters = ifelse(nDaughters > 0,1,0),
         lagFed = ifelse(grepl('Fed Chair',lag(position)),1,0)) %>%
  group_by(date,party,anyDaughters,gender,position,chamber,lagFed) %>%
  summarise(n=n(),
            pctInterrupted = mean(interrupted,na.rm=T),
            pctInterruptor = mean(interruptor,na.rm=T),
            .groups = 'drop')

pdf('../Paper/Figures/overtime_desc.pdf',width = 7,height = 4)
toplot %>%
  mutate(fed = ifelse(position == 'Fed Chair',1,0)) %>%
  group_by(date,fed,chamber) %>%
  summarise(n = sum(n),
            pct = mean(pctInterrupted)) %>%
  ggplot(aes(x = date,y = pct,group = fed,color = factor(fed),shape = factor(fed),linetype = factor(fed))) + 
  geom_point() + 
  geom_line() + 
  facet_wrap(~chamber)+ 
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  theme_ridges() + 
  scale_color_manual(name = 'Interrupting',values = c('grey50','grey10'),labels = c('Legislators','Fed Chair')) + 
  scale_shape_manual(name = 'Interrupting',values = c(19,15),labels = c('Legislators','Fed Chair')) + 
  scale_linetype_manual(name = 'Interrupting',values = c('dashed','solid'),labels = c('Legislators','Fed Chair')) + 
  theme(legend.position = 'bottom') + xlab('Date') + 
  ylab('Proportion of Utterances\nthat are Interruptions')
dev.off()


toplot <- utterance_level %>%
  group_by(fedResp,date,chamber) %>%
  summarise(interruptor = mean(interruptor,na.rm=T)) %>%
  ungroup() %>%
  mutate(grp = paste0(ifelse(date < as.Date('2014-01-01'),'preYellen',
                             ifelse(date < as.Date('2018-01-01'),'Yellen','postYellen')),fedResp),
         chamber = factor(chamber,levels = c('Senate','House')))

pdf('../Paper/Figures/overtime_desc.pdf',width = 7,height = 6)
toplot %>%
  ggplot(aes(x = date,y = interruptor,color = factor(fedResp),group = grp,
             shape = factor(fedResp),linetype = factor(fedResp),
             label = grp)) + 
  geom_point() + 
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  annotate(geom = 'rect',xmin = as.Date('2014-01-01'),xmax = as.Date('2018-01-01'),
            ymin = -Inf,ymax = Inf,
            alpha = .1,fill = 'grey50') + 
  scale_color_manual(name = 'Interrupting',values = c('grey50','grey10'),labels = c('Legislators','Fed Chair')) + 
  scale_shape_manual(name = 'Interrupting',values = c(19,15),labels = c('Legislators','Fed Chair')) + 
  scale_linetype_manual(name = 'Interrupting',values = c('dashed','solid'),labels = c('Legislators','Fed Chair'))  + 
  geom_text(data = toplot %>%
              filter(chamber == 'Senate',
                     grp == 'Yellen1',
                     date == as.Date('2016-02-11')) %>%
              mutate(grp = 'Yellen',
                     date = as.Date('2016-01-01'),
                     interruptor = Inf),vjust = 1,show.legend = FALSE) + 
  theme_bw() + 
  theme(legend.position = 'bottom') + xlab('Date') + 
  ylab('Proportion of Utterances\nthat are Interruptions') + 
  facet_grid(chamber~.) + 
  geom_smooth(method = 'lm',formula = y ~ poly(x,1))
dev.off()

# Diff-in-diff example
finalMerge%>%
  mutate(interrupted = ifelse(grepl('--$',textclean),1,0)) %>%
  mutate(interruptor = ifelse(grepl('--$',lag(textclean)),1,0)) %>%
  mutate(date = as.Date(gsub('fed|\\.txt','',docID)),
         anyDaughters = ifelse(nDaughters > 0,1,0),
         lagFed = ifelse(grepl('Fed Chair',lag(position)),1,0)) %>%
  group_by(date,party,anyDaughters,gender,position,chamber,lagFed,opensecretsID) %>%
  summarise(n=n(),
            pctInterrupted = mean(interrupted,na.rm=T),
            pctInterruptor = mean(interruptor,na.rm=T),
            .groups = 'drop') %>%
  mutate(yellenTime = ifelse(date > as.Date('2014-01-01') & date < as.Date('2018-01-01'),1,0),
         fedChairs = ifelse(date < as.Date('2014-01-01'),'preYellen',
                            ifelse(date > as.Date('2018-01-01'),'Powell','Yellen'))) %>%
  filter(lagFed == 1) %>%
  group_by(date,anyDaughters,chamber,fedChairs) %>%
  summarise(pct = weighted.mean(pctInterruptor,w = n),
            n= sum(n),.groups = 'drop') %>%
  mutate(grp = paste0(fedChairs,anyDaughters)) %>%
  ggplot(aes(x = date,y = pct,color = factor(anyDaughters),group = grp,weight = n,size = n,linetype = factor(anyDaughters))) + 
  geom_point(shape = 21) + 
  # geom_line() +
  geom_smooth(method = 'lm',formula = 'y ~ poly(x,1)',se = F,size = .7) +
  facet_grid(~chamber) +
  geom_vline(xintercept = as.Date(c('2014-01-01','2018-01-01'))) + 
  scale_color_manual(name = 'Daughters',values = c('grey50','grey10'),labels = c('None','Any')) + 
  scale_linetype_manual(name = 'Daughters',values = c('solid','dashed'),labels = c('None','Any')) + 
  scale_size_continuous(name = '# Utterances') + 
  theme_ridges() + 
  xlab('Date') + ylab('Proportion Interrupting Fed Chair')


# Balance table
finalMerge %>%
  mutate(interrupted = ifelse(grepl('--$',textclean),1,0)) %>%
  mutate(interruptor = ifelse(grepl('--$',lag(textclean)),1,0)) %>%
  mutate(date = as.Date(gsub('fed|\\.txt','',docID)),
         anyDaughters = ifelse(nDaughters > 0,1,0),
         fedChairs = ifelse(date < as.Date('2006-01-01'),'Greenspan',
                            ifelse(date < as.Date('2014-01-01'),'Bernanke',
                            ifelse(date > as.Date('2018-01-01'),'Powell','Yellen')))) %>%
  select(opensecretsID,interrupted,interruptor,date,anyDaughters,fedChairs,party,nominate_dim1,age,seniority,
         gender,votepct,votepct_rel,nKids,nSons,nDaughters,firstDaughter,docID) %>%
  distinct() %>%
  group_by(fedChairs,docID) %>%
  summarise(interrupted = mean(interrupted),
            interruptor = mean(interruptor),
            nPols = n(),
            male = mean(gender == 'M'),
            dem = mean(party == 'D'),
            age = mean(age),
            nominate_dim1 = mean(nominate_dim1),
            seniority = mean(seniority),
            votepct = mean(votepct),
            nKids = mean(nKids),
            nSons = mean(nSons),
            nDaughters = mean(nDaughters),
            firstDaughter = mean(firstDaughter),.groups = 'drop') %>%
  group_by(fedChairs) %>%
  summarise(interrupted = mean(interrupted),
            interruptor = mean(interruptor),
            nPols = mean(nPols),
            nHearings = n(),
            male = mean(male),
            dem = mean(dem),
            age = mean(age),
            nominate_dim1 = mean(nominate_dim1),
            seniority = mean(seniority),
            votepct = mean(votepct),
            nKids = mean(nKids),
            nSons = mean(nSons),
            nDaughters = mean(nDaughters),
            firstDaughter = mean(firstDaughter),.groups = 'drop') %>%
  data.frame()
  


finalMerge %>%
  mutate(interrupted = ifelse(grepl('--$',textclean),1,0)) %>%
  mutate(interruptor = ifelse(grepl('--$',lag(textclean)),1,0)) %>%
  mutate(date = as.Date(gsub('fed|\\.txt','',docID)),
         anyDaughters = ifelse(nDaughters > 0,1,0),
         fedChairs = ifelse(date < as.Date('2006-01-01'),'Greenspan',
                            ifelse(date < as.Date('2014-01-01'),'Bernanke',
                                   ifelse(date > as.Date('2018-01-01'),'Powell','Yellen')))) %>%
  select(opensecretsID,interrupted,interruptor,date,anyDaughters,fedChairs,party,nominate_dim1,age,seniority,
         gender,votepct,votepct_rel,nKids,nSons,nDaughters,firstDaughter,docID) %>%
  distinct() %>%
  ggplot(aes(x = fedChairs,y = nominate_dim1)) + 
  geom_boxplot()

forBal <- finalMerge %>%
  # filter(chamber == 'House') %>%
  mutate(interrupted = ifelse(grepl('--$',textclean),1,0)) %>%
  mutate(interruptor = ifelse(grepl('--$',lag(textclean)),1,0)) %>%
  mutate(date = as.Date(gsub('fed|\\.txt','',docID)),
         anyDaughters = ifelse(nDaughters > 0,1,0),
         dem = (party == 'D')+0,
         male = (gender == 'M')+0,
         fedChairs = ifelse(date < as.Date('2006-01-01'),'Greenspan',
                            ifelse(date < as.Date('2014-01-01'),'Bernanke',
                                   ifelse(date > as.Date('2018-01-01'),'Powell','Yellen'))),
         # fedChairs = ifelse(date > as.Date('2014-01-01') & date < as.Date('2018-01-01'),'Yellen','Others')
         ) %>%
  select(opensecretsID,date,anyDaughters,fedChairs,dem,
         nominate_dim1,age,seniority,male,votepct,nKids,nSons,nDaughters,
         firstDaughter,docID) %>%
  distinct() %>%
  select(-opensecretsID,-date,-docID)

require(stargazer)
balTab <- RCT::balance_table(forBal %>%
                     mutate(fedChairs = relevel(factor(fedChairs),ref = 'Yellen')),
                     treatment = 'fedChairs') %>%
  # rename(variable = variablesYellen,
  #        meanMaleFeds = Media_controlYellen,
  #        meanYellen = Media_tratYellen,
  #        pval = p_valueYellen)
  rename(variable = variablesBernanke,
         Yellen = Media_controlBernanke,
         Greenspan = Media_tratGreenspan,
         Powell = Media_tratPowell,
         Bernanke = Media_tratBernanke) %>%
  mutate(Bernanke = ifelse(p_valueBernanke < .05,paste0(round(Bernanke,2),'*'),
                           round(Bernanke,2)),
         Greenspan = ifelse(p_valueGreenspan < .05,paste0(round(Greenspan,2),'*'),
                           round(Greenspan,2)),
         Powell = ifelse(p_valuePowell < .05,paste0(round(Powell,2),'*'),
                           round(Powell,2)),
         Yellen = as.character(round(Yellen,2))) %>%
  select(-matches('p_value')) %>%
  mutate(variable = gsub('(.*)(Daughter|Son|Kid)','Family: \\1\\2',
                         gsub('(.*)(age|male|seniority)','Demog: \\1\\2',
                              gsub('(.*)(dem|nominate_dim1|votepct)','Pol: \\1\\2',variable)))) %>%
  arrange(variable) %>%
  select(Variable = variable,Yellen,Greenspan,Bernanke,Powell)

stargazer(balTab,summary = F,out = '../Paper/Tables/balance_table.tex',rownames = F)

# Descriptives of the number of people with a varying % of daughters
finalMerge %>%
  filter(grepl('N00',opensecretsID)) %>%
  select(opensecretsID,year,nDaughters) %>%
  distinct() %>%
  group_by(opensecretsID) %>%
  summarise(var = var(nDaughters,na.rm=T)) %>%
  arrange(-var)
